{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "您好，我是一个人工智能助手，由 OpenAI 创建。我的设计是基于最新的机器学习技术，并通过 GPT-3 模型（Generative Pretrained Transformer 3）实现能够理解和生成自然语言文本的能力。我可以在多种任务中协助您，比如回答问题、提供信息、帮助解决问题、学习新知识、进行文本创作等。\n",
      "\n",
      "我的知识截止于 2023 年左右，这意味着我在那个时间点之前的历史、事实信息和公认的知识都很熟悉。但是，请注意，我并非实时更新的，并且我没有个人经验或意识，因此我提供的信息是基于数据库和算法生成的，而不是任何个人见解或感知。我的设计宗旨是提供有帮助、准确和中立的回答。如果您有任何问题或需要协助，我随时在这里为您服务！\n"
     ]
    }
   ],
   "source": [
    "from dotenv import load_dotenv\n",
    "load_dotenv(\"openai.env\")\n",
    "\n",
    "#使用openai的官方sdk\n",
    "import openai\n",
    "import os\n",
    "\n",
    "openai.api_base =os.environ.get(\"OPENAI_PROXY\")\n",
    "openai.api_key = os.environ.get(\"OPENAI_API_KEY\")\n",
    "\n",
    "messages = [\n",
    "{\"role\": \"user\", \"content\": \"介绍下你自己\"}\n",
    "]\n",
    "\n",
    "res = openai.ChatCompletion.create(\n",
    "model=\"gpt-4-1106-preview\",\n",
    "messages=messages,\n",
    "stream=False,\n",
    ")\n",
    "\n",
    "print(res['choices'][0]['message']['content'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Lost in the middle: 长上下文精度问题\n",
    "<hr>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple\n",
      "Requirement already satisfied: sentence-transformers in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (2.2.2)\n",
      "Requirement already satisfied: transformers<5.0.0,>=4.6.0 in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from sentence-transformers) (4.35.2)\n",
      "Requirement already satisfied: tqdm in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from sentence-transformers) (4.66.1)\n",
      "Requirement already satisfied: torch>=1.6.0 in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from sentence-transformers) (2.1.1)\n",
      "Requirement already satisfied: torchvision in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from sentence-transformers) (0.16.1)\n",
      "Requirement already satisfied: numpy in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from sentence-transformers) (1.26.1)\n",
      "Requirement already satisfied: scikit-learn in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from sentence-transformers) (1.3.2)\n",
      "Requirement already satisfied: scipy in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from sentence-transformers) (1.11.4)\n",
      "Requirement already satisfied: nltk in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from sentence-transformers) (3.8.1)\n",
      "Requirement already satisfied: sentencepiece in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from sentence-transformers) (0.1.99)\n",
      "Requirement already satisfied: huggingface-hub>=0.4.0 in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from sentence-transformers) (0.17.3)\n",
      "Requirement already satisfied: filelock in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from huggingface-hub>=0.4.0->sentence-transformers) (3.13.1)\n",
      "Requirement already satisfied: fsspec in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from huggingface-hub>=0.4.0->sentence-transformers) (2023.10.0)\n",
      "Requirement already satisfied: requests in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from huggingface-hub>=0.4.0->sentence-transformers) (2.31.0)\n",
      "Requirement already satisfied: pyyaml>=5.1 in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from huggingface-hub>=0.4.0->sentence-transformers) (6.0.1)\n",
      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from huggingface-hub>=0.4.0->sentence-transformers) (4.8.0)\n",
      "Requirement already satisfied: packaging>=20.9 in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from huggingface-hub>=0.4.0->sentence-transformers) (23.2)\n",
      "Requirement already satisfied: sympy in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from torch>=1.6.0->sentence-transformers) (1.12)\n",
      "Requirement already satisfied: networkx in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from torch>=1.6.0->sentence-transformers) (3.2.1)\n",
      "Requirement already satisfied: jinja2 in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from torch>=1.6.0->sentence-transformers) (3.1.2)\n",
      "Requirement already satisfied: regex!=2019.12.17 in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from transformers<5.0.0,>=4.6.0->sentence-transformers) (2023.10.3)\n",
      "Requirement already satisfied: tokenizers<0.19,>=0.14 in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from transformers<5.0.0,>=4.6.0->sentence-transformers) (0.14.1)\n",
      "Requirement already satisfied: safetensors>=0.3.1 in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from transformers<5.0.0,>=4.6.0->sentence-transformers) (0.4.1)\n",
      "Requirement already satisfied: click in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from nltk->sentence-transformers) (8.1.7)\n",
      "Requirement already satisfied: joblib in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from nltk->sentence-transformers) (1.3.2)\n",
      "Requirement already satisfied: threadpoolctl>=2.0.0 in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from scikit-learn->sentence-transformers) (3.2.0)\n",
      "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from torchvision->sentence-transformers) (10.1.0)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from jinja2->torch>=1.6.0->sentence-transformers) (2.1.3)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from requests->huggingface-hub>=0.4.0->sentence-transformers) (3.3.2)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from requests->huggingface-hub>=0.4.0->sentence-transformers) (3.4)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from requests->huggingface-hub>=0.4.0->sentence-transformers) (2.1.0)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from requests->huggingface-hub>=0.4.0->sentence-transformers) (2023.7.22)\n",
      "Requirement already satisfied: mpmath>=0.19 in /Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages (from sympy->torch>=1.6.0->sentence-transformers) (1.3.0)\n",
      "\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "! pip install sentence-transformers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[Document(page_content='凯尔特人队是我最喜欢的球队。', metadata={}),\n",
       " Document(page_content='我非常喜欢去看电影。', metadata={}),\n",
       " Document(page_content='带我飞往月球是我最喜欢的歌曲之一。', metadata={}),\n",
       " Document(page_content='篮球是一项伟大的运动。', metadata={}),\n",
       " Document(page_content='拉里.伯德是一位标志性的NBA球员。', metadata={}),\n",
       " Document(page_content='这只是一段随机的文字。', metadata={}),\n",
       " Document(page_content='这是一篇关于波士顿凯尔特人的文件。', metadata={}),\n",
       " Document(page_content='《艾尔登之环》是过去15年最好的游戏之一。', metadata={}),\n",
       " Document(page_content='波士顿凯尔特人队以20分的优势赢得了比赛。', metadata={}),\n",
       " Document(page_content='L.科内特是凯尔特人队最好的球员之一。', metadata={})]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from langchain.chains import LLMChain,StuffDocumentsChain\n",
    "from langchain.document_transformers import (\n",
    "    LongContextReorder\n",
    ")\n",
    "from langchain.embeddings import HuggingFaceBgeEmbeddings\n",
    "from langchain.vectorstores import  Chroma\n",
    "\n",
    "#使用huggingface托管的开源LLM来做嵌入，MiniLM-L6-v2是一个较小的LLM \n",
    "embedings = HuggingFaceBgeEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n",
    "\n",
    "text = [\n",
    "    \"篮球是一项伟大的运动。\",\n",
    "    \"带我飞往月球是我最喜欢的歌曲之一。\",\n",
    "    \"凯尔特人队是我最喜欢的球队。\",\n",
    "    \"这是一篇关于波士顿凯尔特人的文件。\",\n",
    "    \"我非常喜欢去看电影。\",\n",
    "    \"波士顿凯尔特人队以20分的优势赢得了比赛。\",\n",
    "    \"这只是一段随机的文字。\",\n",
    "    \"《艾尔登之环》是过去15年最好的游戏之一。\",\n",
    "    \"L.科内特是凯尔特人队最好的球员之一。\",\n",
    "    \"拉里.伯德是一位标志性的NBA球员。\"\n",
    "]\n",
    "\n",
    "retrieval = Chroma.from_texts(text,embedings).as_retriever(\n",
    "    search_kwargs={\"k\": 10}\n",
    ")\n",
    "query = \"关于我的喜好都知道什么?\"\n",
    "\n",
    "#根据相关性返回文本块\n",
    "docs = retrieval.get_relevant_documents(query)\n",
    "docs\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(page_content='我非常喜欢去看电影。', metadata={}),\n",
       " Document(page_content='篮球是一项伟大的运动。', metadata={}),\n",
       " Document(page_content='这只是一段随机的文字。', metadata={}),\n",
       " Document(page_content='《艾尔登之环》是过去15年最好的游戏之一。', metadata={}),\n",
       " Document(page_content='L.科内特是凯尔特人队最好的球员之一。', metadata={}),\n",
       " Document(page_content='波士顿凯尔特人队以20分的优势赢得了比赛。', metadata={}),\n",
       " Document(page_content='这是一篇关于波士顿凯尔特人的文件。', metadata={}),\n",
       " Document(page_content='拉里.伯德是一位标志性的NBA球员。', metadata={}),\n",
       " Document(page_content='带我飞往月球是我最喜欢的歌曲之一。', metadata={}),\n",
       " Document(page_content='凯尔特人队是我最喜欢的球队。', metadata={})]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#对检索结果进行重新排序，根据论文的方案\n",
    "#问题相关性越低的内容块放在中间\n",
    "#问题相关性越高的内容块放在头尾\n",
    "\n",
    "reordering = LongContextReorder()\n",
    "reo_docs = reordering.transform_documents(docs)\n",
    "\n",
    "#头尾共有4个高相关性内容块\n",
    "reo_docs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from dotenv import load_dotenv\n",
    "load_dotenv(\"openai.env\")\n",
    "import os\n",
    "api_key = os.environ.get(\"OPENAI_API_KEY\")\n",
    "api_base = os.environ.get(\"OPENAI_API_BASE\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages/langchain/utils/utils.py:155: UserWarning: WARNING! api_key is not default parameter.\n",
      "                api_key was transferred to model_kwargs.\n",
      "                Please confirm that api_key is what you intended.\n",
      "  warnings.warn(\n",
      "/Users/tomiezhang/.pyenv/versions/3.10.12/envs/langchains/lib/python3.11/site-packages/langchain/utils/utils.py:155: UserWarning: WARNING! api_base is not default parameter.\n",
      "                api_base was transferred to model_kwargs.\n",
      "                Please confirm that api_base is what you intended.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'\\n看电影。'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#检测下这种方案的精度效果\n",
    "from langchain.prompts import PromptTemplate\n",
    "from langchain.llms import OpenAI\n",
    "\n",
    "#设置llm\n",
    "llm = OpenAI(\n",
    "    api_key=api_key,\n",
    "    api_base=api_base,\n",
    "    model=\"gpt-3.5-turbo-instruct\",\n",
    "    temperature=0\n",
    ")\n",
    "\n",
    "document_prompt = PromptTemplate(\n",
    "    input_variables=[\"page_content\"],template=\"{page_content}\"\n",
    ")\n",
    "\n",
    "stuff_prompt_override =\"\"\"Given this text extracts:\n",
    "----------------------------------------\n",
    "{context}\n",
    "----------------------------------------\n",
    "Please answer the following questions:\n",
    "{query}\n",
    "\"\"\"\n",
    "\n",
    "prompt = PromptTemplate(\n",
    "    template=stuff_prompt_override,\n",
    "    input_variables=[\"context\",\"query\"]\n",
    ")\n",
    "\n",
    "llm_chain = LLMChain(\n",
    "    llm=llm,\n",
    "    prompt=prompt\n",
    ")\n",
    "\n",
    "WorkChain = StuffDocumentsChain(\n",
    "    llm_chain=llm_chain,\n",
    "    document_prompt=document_prompt,\n",
    "    document_variable_name=\"context\"\n",
    ")\n",
    "\n",
    "#调用\n",
    "WorkChain.run(\n",
    "    input_documents=reo_docs,\n",
    "    query=\"我最喜欢做什么事情？\"\n",
    ")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "langchains",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
        